5 classes of cervical cells¶

"Dyskeratotic", "Koilocytotic": abnormal but not malignant¶

"Metaplastic": benign¶

"Parabasal", "Superficial-Intermediate": normal cells¶

In [ ]:
 
In [ ]:
 
In [ ]:
# Importing Necessary Libraries
import cv2
import os
import shutil
import math
import random
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import warnings
warnings.filterwarnings("ignore")
In [ ]:
# import shutil

# # Clear the existing mount point
# shutil.rmtree('/content/drive')

# Mount Google Drive
from google.colab import drive
drive.mount('/content/drive')
---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-3-de7c54a020a6> in <cell line: 8>()
      6 # Mount Google Drive
      7 from google.colab import drive
----> 8 drive.mount('/content/drive')

/usr/local/lib/python3.10/dist-packages/google/colab/drive.py in mount(mountpoint, force_remount, timeout_ms, readonly)
     98 def mount(mountpoint, force_remount=False, timeout_ms=120000, readonly=False):
     99   """Mount your Google Drive at the specified mountpoint path."""
--> 100   return _mount(
    101       mountpoint,
    102       force_remount=force_remount,

/usr/local/lib/python3.10/dist-packages/google/colab/drive.py in _mount(mountpoint, force_remount, timeout_ms, ephemeral, readonly)
    281             'https://research.google.com/colaboratory/faq.html#drive-timeout'
    282         )
--> 283       raise ValueError('mount failed' + extra_reason)
    284     elif case == 4:
    285       # Terminate the DriveFS binary before killing bash.

ValueError: mount failed

Format dataset. Since we will only use "Image Features", focus on .bmp.¶

Note: in the dataset, there are Cell Features, Image Features, and Deep Features.¶

In [ ]:
# Function for Formatting Dataset
def FormatDataset(dataset_src, dataset_dest, classes):
    # Making a Copy of Dataset
    new_cropped_dest = [os.path.join(dataset_dest, cls, 'CROPPED') for cls in classes]
    new_complete_dest = [os.path.join(dataset_dest, cls, 'COMPLETE') for cls in classes]
    cropped_src = [ dataset_src + "/im_" + cls + "/im_" + cls + "/CROPPED" for cls in classes ]
    complete_src = [ dataset_src + "/im_" + cls + "/im_" + cls for cls in classes ]
    for (dest1, dest2) in zip(new_cropped_dest, new_complete_dest):
        os.makedirs(dest1, exist_ok=True)
        os.makedirs(dest2, exist_ok=True)
    # Formating Cropped Images
    for (src, new_dest) in zip(cropped_src, new_cropped_dest):
        for file in os.listdir(src):
            filename, file_ext = os.path.splitext(file)
            if file_ext == '.bmp':
                img_des = os.path.join(new_dest, filename + '.jpg')
                img = cv2.imread(os.path.join(src, file))
                img = cv2.resize(img, (64, 64))
                img = cv2.copyMakeBorder(img, 1, 1, 1, 1, cv2.BORDER_CONSTANT, value=0)
                img = cv2.blur(img, (2, 2))
                cv2.imwrite(img_des ,img)
    # Formatting Complete Images
    for (src, new_dest) in zip(complete_src, new_complete_dest):
        for file in os.listdir(src):
            filename, file_ext = os.path.splitext(file)
            if file_ext == '.bmp':
                img_des = os.path.join(new_dest, filename + '.jpg')
                img = cv2.imread(os.path.join(src, file))
                img = cv2.resize(img, (256, 256))
                img = cv2.copyMakeBorder(img, 2, 2, 2, 2, cv2.BORDER_CONSTANT, value=0)
                img = cv2.blur(img, (2, 2))
                cv2.imwrite(img_des, img)

# Source Location for Dataset
src = '/content/drive/Shareddrives/Computer Vision Final Project'
# Destination Location for Dataset
dest = '/content/drive/Shareddrives/Computer Vision Final Project/CervicalCancer'
# Image Classes
classes = ["Dyskeratotic", "Koilocytotic", "Metaplastic", "Parabasal", "Superficial-Intermediate"]
# Formatting Dataset
FormatDataset(src, dest, classes)

image.png

Count the number of images from each class under COMPLETE and CROPPED seperately.¶

In [ ]:
import os
import matplotlib.pyplot as plt

root_dir = "/content/drive/Shareddrives/Computer Vision Final Project/CervicalCancer"

classes = ["Dyskeratotic","Koilocytotic","Metaplastic","Parabasal","Superficial-Intermediate"]

def GetDatasetSize(path, classes, main="CROPPED"):
    num_of_image = {}
    total_images = 0
    for cls in classes:
        # Counting the Number of Files in the Folder
        num_files = len(os.listdir(os.path.join(path, cls, main)))
        num_of_image[cls] = num_files
        total_images += num_files
    return num_of_image, total_images

def plot_class_distribution(class_image_counts):
    classes = list(class_image_counts.keys())
    counts = list(class_image_counts.values())

    colors = ['orange' if cls in ["Dyskeratotic", "Koilocytotic"] else
              'yellow' if cls == "Metaplastic" else
              'green' for cls in classes]

    plt.figure(figsize=(10, 6))
    plt.bar(classes, counts, color=colors)
    plt.xlabel('Class')
    plt.ylabel('Number of Images')
    plt.title('Number of Images per Class')
    plt.xticks(rotation=45)
    plt.show()
In [ ]:
class_image_counts, total_images = GetDatasetSize(root_dir, classes, "COMPLETE")
print("COMPLETE")
print("Number of images per class:", class_image_counts)
print("Total number of images:", total_images)

# Plot the distribution
plot_class_distribution(class_image_counts)
COMPLETE
Number of images per class: {'Dyskeratotic': 223, 'Koilocytotic': 238, 'Metaplastic': 271, 'Parabasal': 108, 'Superficial-Intermediate': 126}
Total number of images: 966
In [ ]:
class_image_counts, total_images = GetDatasetSize(root_dir, classes, "CROPPED")
print("CROPPED")
print("Number of images per class:", class_image_counts)
print("Total number of images:", total_images)

# Plot the distribution
plot_class_distribution(class_image_counts)
CROPPED
Number of images per class: {'Dyskeratotic': 813, 'Koilocytotic': 825, 'Metaplastic': 793, 'Parabasal': 787, 'Superficial-Intermediate': 831}
Total number of images: 4049

Present 5 images from each class under COMPLETE and CROPPED seperately.¶

In [ ]:
import os
import cv2
import matplotlib.pyplot as plt


def display_images(path, classes, main="CROPPED", num_images=5):
    fig, axes = plt.subplots(len(classes), num_images, figsize=(15, 15))

    color_map = {
        "Dyskeratotic": "orange",
        "Koilocytotic": "orange",
        "Metaplastic": "yellow",
        "Parabasal": "green",
        "Superficial-Intermediate": "green"
    }

    for i, cls in enumerate(classes):
        cls_path = os.path.join(path, cls, main) # Construct the path to the directory containing the images for the current class.
        images = os.listdir(cls_path)[:num_images]
        for j, image_file in enumerate(images):
            img_path = os.path.join(cls_path, image_file)
            img = cv2.imread(img_path)
            img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) # Convert the image from BGR (OpenCV default) to RGB (matplotlib default).
            axes[i, j].imshow(img) # Display the image in the appropriate subplot.
            axes[i, j].axis('off')
            if j == 0: # Set the title of the first image in each row to the class name.
                axes[i, j].set_title(cls, color='black', bbox=dict(facecolor=color_map[cls], edgecolor='none', pad=5))

    plt.tight_layout()
    plt.show()
In [ ]:
print("COMPLETE")
display_images(root_dir, classes, "COMPLETE", 5)
COMPLETE
In [ ]:
print("CROPPED")
display_images(root_dir, classes, "CROPPED", 5)
CROPPED

Split First: Split the CROPPED dataset into train, validation, and test sets.¶

Augment Training Set: Perform data augmentation only on the training set.¶

This way, we maintain the integrity of the validation and test sets while expanding the training data through augmentation.¶

If augmentation and then split dataset, there's a risk that augmented versions of the same image may end up in both the training and validation/test sets, which can lead to overfitting and overly optimistic performance estimates.¶

Split the Dataset into train, validation, and test for CROPPED¶

70% for Train Data¶

15% for Validation Data¶

15% for Testing Data¶

In [ ]:
import os
import shutil
import numpy as np

# Function for Creating Train / Validation / Test folders (One time use Only)
def TrainValTestSplit(root_dir, classes_dir, main="CROPPED", val_ratio=0.15, test_ratio=0.15):
    for cls in classes_dir:
        # Creating Split Folders inside the root_dir
        # For each class, create directories for training, validation, and test sets inside root_dir.
        os.makedirs(os.path.join(root_dir, 'train', cls), exist_ok=True)
        os.makedirs(os.path.join(root_dir, 'val', cls), exist_ok=True)
        os.makedirs(os.path.join(root_dir, 'test', cls), exist_ok=True)

        # Folder to copy images from
        src = os.path.join(root_dir, cls, main)

        # Splitting the Files in the Given ratio
        # List all file names in the source directory.
        allFileNames = os.listdir(src)
        # Shuffle the file names to randomize the order.
        np.random.shuffle(allFileNames)
        # Split the file names into training, validation, and testing sets based on the specified ratios.
        train_FileNames, val_FileNames, test_FileNames = np.split(
            np.array(allFileNames),
            [int(len(allFileNames) * (1 - (val_ratio + test_ratio))), int(len(allFileNames) * (1 - test_ratio))]
        )

        # Convert the file names into full file paths for training, validation, and testing sets.
        train_FileNames = [os.path.join(src, name) for name in train_FileNames.tolist()]
        val_FileNames = [os.path.join(src, name) for name in val_FileNames.tolist()]
        test_FileNames = [os.path.join(src, name) for name in test_FileNames.tolist()]

        # Printing the Split Details
        # Print the number of total images, training images, validation images, and testing images for each class.
        print(cls, ':')
        print('Total images: ', len(allFileNames))
        print('Training: ', len(train_FileNames))
        print('Validation: ', len(val_FileNames))
        print('Testing: ', len(test_FileNames))

        # Copy-pasting images to respective directories
        # Copy each image to its respective directory (train, val, or test) based on the split.
        for name in train_FileNames:
            shutil.copy(name, os.path.join(root_dir, 'train', cls))

        for name in val_FileNames:
            shutil.copy(name, os.path.join(root_dir, 'val', cls))

        for name in test_FileNames:
            shutil.copy(name, os.path.join(root_dir, 'test', cls))
        print()

# Performing Train / Validation / Test Split
root_dir = "/content/drive/Shareddrives/Computer Vision Final Project/CervicalCancer"
classes = ["Dyskeratotic", "Koilocytotic", "Metaplastic", "Parabasal", "Superficial-Intermediate"]

TrainValTestSplit(root_dir, classes)
Dyskeratotic :
Total images:  813
Training:  569
Validation:  122
Testing:  122

Koilocytotic :
Total images:  825
Training:  577
Validation:  124
Testing:  124

Metaplastic :
Total images:  793
Training:  555
Validation:  119
Testing:  119

Parabasal :
Total images:  787
Training:  550
Validation:  118
Testing:  119

Superficial-Intermediate :
Total images:  831
Training:  581
Validation:  125
Testing:  125

Another way to count the number of images. It lines up with counts above.¶

In [ ]:
import os

# Function to count number of images in each class directory for train, val, and test
def count_images_in_split_dirs(root_dir, classes):
    splits = ['train', 'val', 'test']
    counts = {split: {cls: 0 for cls in classes} for split in splits}

    for split in splits:
        for cls in classes:
            class_dir = os.path.join(root_dir, split, cls)
            if os.path.exists(class_dir):
                counts[split][cls] = len(os.listdir(class_dir))
            else:
                counts[split][cls] = 0

    return counts

# Define the root directory and classes
root_dir = "/content/drive/Shareddrives/Computer Vision Final Project/CervicalCancer"
classes = ["Dyskeratotic", "Koilocytotic", "Metaplastic", "Parabasal", "Superficial-Intermediate"]

# Get the counts of images
image_counts = count_images_in_split_dirs(root_dir, classes)

# Print the counts
for split in image_counts:
    print(f"\n{split.upper()}:")
    for cls in image_counts[split]:
        print(f"  {cls}: {image_counts[split][cls]} images")
TRAIN:
  Dyskeratotic: 569 images
  Koilocytotic: 577 images
  Metaplastic: 555 images
  Parabasal: 550 images
  Superficial-Intermediate: 581 images

VAL:
  Dyskeratotic: 122 images
  Koilocytotic: 124 images
  Metaplastic: 119 images
  Parabasal: 118 images
  Superficial-Intermediate: 125 images

TEST:
  Dyskeratotic: 122 images
  Koilocytotic: 124 images
  Metaplastic: 119 images
  Parabasal: 119 images
  Superficial-Intermediate: 125 images

Building the basic CNN model¶

In [ ]:
# Importing Keras for Image Classification
import keras
from keras.layers import Dense,Conv2D, Flatten, MaxPool2D, Dropout
from keras.models import Sequential
from keras.preprocessing import image
from keras.callbacks import ModelCheckpoint
from keras.models import load_model
In [ ]:
# CNN Model

model = Sequential()
# Convolutional Layer with input shape (64,64,3)
model.add(Conv2D(filters=16, kernel_size= (3,3), activation= 'relu', input_shape=(64,64,3)) )

model.add(Conv2D(filters=32, kernel_size=(3,3), activation='relu' ))
model.add(MaxPool2D(pool_size=(2,2)))


model.add(Conv2D(filters=64, kernel_size=(3,3), activation='relu' ))
model.add(MaxPool2D(pool_size=(2,2)))


model.add(Conv2D(filters=128, kernel_size=(3,3), activation='relu' ))
model.add(MaxPool2D(pool_size=(2,2)))

model.add(Dropout(rate=0.25))

model.add(Flatten())
model.add(Dense(units=64, activation='relu'))
model.add(Dropout(rate=0.25))
model.add(Dense(units=5, activation='sigmoid'))

model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy']  )

model.summary()
Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
=================================================================
 conv2d (Conv2D)             (None, 62, 62, 16)        448       
                                                                 
 conv2d_1 (Conv2D)           (None, 60, 60, 32)        4640      
                                                                 
 max_pooling2d (MaxPooling2  (None, 30, 30, 32)        0         
 D)                                                              
                                                                 
 conv2d_2 (Conv2D)           (None, 28, 28, 64)        18496     
                                                                 
 max_pooling2d_1 (MaxPoolin  (None, 14, 14, 64)        0         
 g2D)                                                            
                                                                 
 conv2d_3 (Conv2D)           (None, 12, 12, 128)       73856     
                                                                 
 max_pooling2d_2 (MaxPoolin  (None, 6, 6, 128)         0         
 g2D)                                                            
                                                                 
 dropout (Dropout)           (None, 6, 6, 128)         0         
                                                                 
 flatten (Flatten)           (None, 4608)              0         
                                                                 
 dense (Dense)               (None, 64)                294976    
                                                                 
 dropout_1 (Dropout)         (None, 64)                0         
                                                                 
 dense_1 (Dense)             (None, 5)                 325       
                                                                 
=================================================================
Total params: 392741 (1.50 MB)
Trainable params: 392741 (1.50 MB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________

Data augmentation using Keras' ImageDataGenerator class¶

Data augmentation is a technique used to artificially expand the size of a dataset by creating modified versions of images in the dataset. This helps improve the robustness and generalization ability of machine learning models, especially in tasks like image classification.¶

Ensure that all images fed into the model during training, validation, and testing have their pixel values normalized to the range [0, 1]¶

In [ ]:
# Expand the size of dataset with new transformed images from the original dataset using ImageDataGenerator.

train_datagen = image.ImageDataGenerator(zoom_range = 0.2, shear_range = 0.2 , rescale = 1./255 , horizontal_flip=True)

val_datagen = image.ImageDataGenerator(rescale = 1./255)
test_datagen = image.ImageDataGenerator(rescale = 1./255)
In [ ]:
# The train_data object is an instance of a Keras DirectoryIterator, which generates batches of data from the specified directory.
# The flow_from_directory method reads images from the specified directory and applies the transformations defined in the train_datagen object (such as augmentation and normalization).
# Resizes Images: All images are resized to 64x64 pixels.
# Batch Processing: Images are processed and yielded in batches of 100.
# Categorical Labels: The labels for the images are one-hot encoded.
# The flow_from_directory function is a powerful way to generate batches of tensor image data with real-time data augmentation. It is especially useful when having a large dataset organized into subdirectories by class. The function reads the images, applies the specified preprocessing steps, and yields them in batches for training the neural network.

train_data = train_datagen.flow_from_directory(directory= root_dir + "/train", target_size=(64, 64), batch_size=100, class_mode = 'categorical')
Found 2832 images belonging to 5 classes.
In [ ]:
train_data.class_indices
Out[ ]:
{'Dyskeratotic': 0,
 'Koilocytotic': 1,
 'Metaplastic': 2,
 'Parabasal': 3,
 'Superficial-Intermediate': 4}
In [ ]:
val_data = val_datagen.flow_from_directory(directory= root_dir + "/val", target_size=(64, 64), batch_size=100, class_mode = 'categorical')
Found 608 images belonging to 5 classes.
In [ ]:
test_data = test_datagen.flow_from_directory(directory= root_dir + "/test", target_size=(64, 64), batch_size=100, class_mode = 'categorical')
Found 609 images belonging to 5 classes.
In [ ]:
# Adding Model check point Callback
# This callback is used during the training process to save the model weights. It monitors the validation accuracy and saves the model only if there is an improvement.

from tensorflow.keras.callbacks import ModelCheckpoint

# Define the full filepath for saving the best model

filepath = os.path.join(root_dir, "cervical_cancer_best_model.hdf5")

# Adding Model Checkpoint Callback

mc = ModelCheckpoint(
    filepath=filepath,
    monitor='val_accuracy',
    verbose=1, # When set to 1, the callback will print messages when the model is being saved.
    save_best_only=True, # When set to True, the callback saves the model only when the monitored metric (val_accuracy) improves. This ensures that only the best model, in terms of validation accuracy, is saved.
    mode='auto'
)
call_back = [mc]
In [ ]:
# Fitting the Model

# steps_per_epoch = 28 (how many batches on one epoch)
# This parameter defines the number of batches of samples to be used in each epoch.
# Essentially, it is the number of times the model will be updated in one epoch.
# Since the batch size is 100, then 28 steps per epoch mean that the model will see 2800 (28 * 100) samples in one epoch.

# validation_steps=6
# This parameter defines the number of batches of samples to be used in each validation epoch.
# This means that in each epoch, the model will see 600 (6 * 100) samples from the validation set.

cnn = model.fit(train_data,
                  steps_per_epoch= 28,
                  epochs= 64,
                  validation_data= val_data,
                  validation_steps= 6,
                  callbacks = call_back )
Epoch 1/64
28/28 [==============================] - ETA: 0s - loss: 1.4633 - accuracy: 0.3034  
Epoch 1: val_accuracy improved from -inf to 0.37333, saving model to /content/drive/Shareddrives/Computer Vision Final Project/CervicalCancer/cervical_cancer_best_model.hdf5
28/28 [==============================] - 2349s 84s/step - loss: 1.4633 - accuracy: 0.3034 - val_loss: 1.2985 - val_accuracy: 0.3733
Epoch 2/64
28/28 [==============================] - ETA: 0s - loss: 1.1777 - accuracy: 0.5190
Epoch 2: val_accuracy improved from 0.37333 to 0.65667, saving model to /content/drive/Shareddrives/Computer Vision Final Project/CervicalCancer/cervical_cancer_best_model.hdf5
28/28 [==============================] - 11s 383ms/step - loss: 1.1777 - accuracy: 0.5190 - val_loss: 0.9799 - val_accuracy: 0.6567
Epoch 3/64
28/28 [==============================] - ETA: 0s - loss: 1.0051 - accuracy: 0.6080
Epoch 3: val_accuracy improved from 0.65667 to 0.66000, saving model to /content/drive/Shareddrives/Computer Vision Final Project/CervicalCancer/cervical_cancer_best_model.hdf5
28/28 [==============================] - 12s 415ms/step - loss: 1.0051 - accuracy: 0.6080 - val_loss: 0.9196 - val_accuracy: 0.6600
Epoch 4/64
28/28 [==============================] - ETA: 0s - loss: 0.9078 - accuracy: 0.6633
Epoch 4: val_accuracy improved from 0.66000 to 0.73167, saving model to /content/drive/Shareddrives/Computer Vision Final Project/CervicalCancer/cervical_cancer_best_model.hdf5
28/28 [==============================] - 11s 408ms/step - loss: 0.9078 - accuracy: 0.6633 - val_loss: 0.7619 - val_accuracy: 0.7317
Epoch 5/64
28/28 [==============================] - ETA: 0s - loss: 0.8248 - accuracy: 0.6903
Epoch 5: val_accuracy did not improve from 0.73167
28/28 [==============================] - 10s 370ms/step - loss: 0.8248 - accuracy: 0.6903 - val_loss: 0.7761 - val_accuracy: 0.6883
Epoch 6/64
28/28 [==============================] - ETA: 0s - loss: 0.7842 - accuracy: 0.7072
Epoch 6: val_accuracy did not improve from 0.73167
28/28 [==============================] - 11s 392ms/step - loss: 0.7842 - accuracy: 0.7072 - val_loss: 0.7119 - val_accuracy: 0.7317
Epoch 7/64
28/28 [==============================] - ETA: 0s - loss: 0.7546 - accuracy: 0.7204
Epoch 7: val_accuracy improved from 0.73167 to 0.74667, saving model to /content/drive/Shareddrives/Computer Vision Final Project/CervicalCancer/cervical_cancer_best_model.hdf5
28/28 [==============================] - 11s 409ms/step - loss: 0.7546 - accuracy: 0.7204 - val_loss: 0.6603 - val_accuracy: 0.7467
Epoch 8/64
28/28 [==============================] - ETA: 0s - loss: 0.6902 - accuracy: 0.7401
Epoch 8: val_accuracy improved from 0.74667 to 0.77000, saving model to /content/drive/Shareddrives/Computer Vision Final Project/CervicalCancer/cervical_cancer_best_model.hdf5
28/28 [==============================] - 11s 410ms/step - loss: 0.6902 - accuracy: 0.7401 - val_loss: 0.6111 - val_accuracy: 0.7700
Epoch 9/64
28/28 [==============================] - ETA: 0s - loss: 0.6603 - accuracy: 0.7643
Epoch 9: val_accuracy did not improve from 0.77000
28/28 [==============================] - 11s 382ms/step - loss: 0.6603 - accuracy: 0.7643 - val_loss: 0.5964 - val_accuracy: 0.7683
Epoch 10/64
28/28 [==============================] - ETA: 0s - loss: 0.6631 - accuracy: 0.7566
Epoch 10: val_accuracy did not improve from 0.77000
28/28 [==============================] - 10s 373ms/step - loss: 0.6631 - accuracy: 0.7566 - val_loss: 0.6260 - val_accuracy: 0.7633
Epoch 11/64
28/28 [==============================] - ETA: 0s - loss: 0.6443 - accuracy: 0.7694
Epoch 11: val_accuracy improved from 0.77000 to 0.77667, saving model to /content/drive/Shareddrives/Computer Vision Final Project/CervicalCancer/cervical_cancer_best_model.hdf5
28/28 [==============================] - 12s 422ms/step - loss: 0.6443 - accuracy: 0.7694 - val_loss: 0.6339 - val_accuracy: 0.7767
Epoch 12/64
28/28 [==============================] - ETA: 0s - loss: 0.6400 - accuracy: 0.7629
Epoch 12: val_accuracy improved from 0.77667 to 0.80833, saving model to /content/drive/Shareddrives/Computer Vision Final Project/CervicalCancer/cervical_cancer_best_model.hdf5
28/28 [==============================] - 12s 428ms/step - loss: 0.6400 - accuracy: 0.7629 - val_loss: 0.5055 - val_accuracy: 0.8083
Epoch 13/64
28/28 [==============================] - ETA: 0s - loss: 0.5383 - accuracy: 0.8042
Epoch 13: val_accuracy did not improve from 0.80833
28/28 [==============================] - 11s 386ms/step - loss: 0.5383 - accuracy: 0.8042 - val_loss: 0.5701 - val_accuracy: 0.8033
Epoch 14/64
28/28 [==============================] - ETA: 0s - loss: 0.5567 - accuracy: 0.7961
Epoch 14: val_accuracy did not improve from 0.80833
28/28 [==============================] - 10s 371ms/step - loss: 0.5567 - accuracy: 0.7961 - val_loss: 0.5195 - val_accuracy: 0.7950
Epoch 15/64
28/28 [==============================] - ETA: 0s - loss: 0.5142 - accuracy: 0.8130
Epoch 15: val_accuracy improved from 0.80833 to 0.83333, saving model to /content/drive/Shareddrives/Computer Vision Final Project/CervicalCancer/cervical_cancer_best_model.hdf5
28/28 [==============================] - 11s 411ms/step - loss: 0.5142 - accuracy: 0.8130 - val_loss: 0.4631 - val_accuracy: 0.8333
Epoch 16/64
28/28 [==============================] - ETA: 0s - loss: 0.5498 - accuracy: 0.8115
Epoch 16: val_accuracy did not improve from 0.83333
28/28 [==============================] - 11s 387ms/step - loss: 0.5498 - accuracy: 0.8115 - val_loss: 0.4813 - val_accuracy: 0.8300
Epoch 17/64
28/28 [==============================] - ETA: 0s - loss: 0.4881 - accuracy: 0.8195
Epoch 17: val_accuracy did not improve from 0.83333
28/28 [==============================] - 11s 391ms/step - loss: 0.4881 - accuracy: 0.8195 - val_loss: 0.4872 - val_accuracy: 0.8217
Epoch 18/64
28/28 [==============================] - ETA: 0s - loss: 0.4603 - accuracy: 0.8397
Epoch 18: val_accuracy did not improve from 0.83333
28/28 [==============================] - 11s 398ms/step - loss: 0.4603 - accuracy: 0.8397 - val_loss: 0.4960 - val_accuracy: 0.8100
Epoch 19/64
28/28 [==============================] - ETA: 0s - loss: 0.4806 - accuracy: 0.8280
Epoch 19: val_accuracy did not improve from 0.83333
28/28 [==============================] - 11s 384ms/step - loss: 0.4806 - accuracy: 0.8280 - val_loss: 0.5149 - val_accuracy: 0.8183
Epoch 20/64
28/28 [==============================] - ETA: 0s - loss: 0.4251 - accuracy: 0.8463
Epoch 20: val_accuracy improved from 0.83333 to 0.86000, saving model to /content/drive/Shareddrives/Computer Vision Final Project/CervicalCancer/cervical_cancer_best_model.hdf5
28/28 [==============================] - 12s 426ms/step - loss: 0.4251 - accuracy: 0.8463 - val_loss: 0.3852 - val_accuracy: 0.8600
Epoch 21/64
28/28 [==============================] - ETA: 0s - loss: 0.4085 - accuracy: 0.8400
Epoch 21: val_accuracy did not improve from 0.86000
28/28 [==============================] - 11s 394ms/step - loss: 0.4085 - accuracy: 0.8400 - val_loss: 0.4046 - val_accuracy: 0.8600
Epoch 22/64
28/28 [==============================] - ETA: 0s - loss: 0.4032 - accuracy: 0.8463
Epoch 22: val_accuracy improved from 0.86000 to 0.86333, saving model to /content/drive/Shareddrives/Computer Vision Final Project/CervicalCancer/cervical_cancer_best_model.hdf5
28/28 [==============================] - 12s 417ms/step - loss: 0.4032 - accuracy: 0.8463 - val_loss: 0.3837 - val_accuracy: 0.8633
Epoch 23/64
28/28 [==============================] - ETA: 0s - loss: 0.3785 - accuracy: 0.8624
Epoch 23: val_accuracy improved from 0.86333 to 0.88167, saving model to /content/drive/Shareddrives/Computer Vision Final Project/CervicalCancer/cervical_cancer_best_model.hdf5
28/28 [==============================] - 12s 414ms/step - loss: 0.3785 - accuracy: 0.8624 - val_loss: 0.3447 - val_accuracy: 0.8817
Epoch 24/64
28/28 [==============================] - ETA: 0s - loss: 0.3708 - accuracy: 0.8690
Epoch 24: val_accuracy did not improve from 0.88167
28/28 [==============================] - 10s 374ms/step - loss: 0.3708 - accuracy: 0.8690 - val_loss: 0.3363 - val_accuracy: 0.8817
Epoch 25/64
28/28 [==============================] - ETA: 0s - loss: 0.3848 - accuracy: 0.8576
Epoch 25: val_accuracy did not improve from 0.88167
28/28 [==============================] - 11s 382ms/step - loss: 0.3848 - accuracy: 0.8576 - val_loss: 0.3267 - val_accuracy: 0.8800
Epoch 26/64
28/28 [==============================] - ETA: 0s - loss: 0.3527 - accuracy: 0.8697
Epoch 26: val_accuracy did not improve from 0.88167
28/28 [==============================] - 11s 384ms/step - loss: 0.3527 - accuracy: 0.8697 - val_loss: 0.3511 - val_accuracy: 0.8700
Epoch 27/64
28/28 [==============================] - ETA: 0s - loss: 0.3730 - accuracy: 0.8664
Epoch 27: val_accuracy did not improve from 0.88167
28/28 [==============================] - 10s 370ms/step - loss: 0.3730 - accuracy: 0.8664 - val_loss: 0.3945 - val_accuracy: 0.8517
Epoch 28/64
28/28 [==============================] - ETA: 0s - loss: 0.3521 - accuracy: 0.8748
Epoch 28: val_accuracy did not improve from 0.88167
28/28 [==============================] - 11s 380ms/step - loss: 0.3521 - accuracy: 0.8748 - val_loss: 0.3644 - val_accuracy: 0.8783
Epoch 29/64
28/28 [==============================] - ETA: 0s - loss: 0.3419 - accuracy: 0.8770
Epoch 29: val_accuracy improved from 0.88167 to 0.89000, saving model to /content/drive/Shareddrives/Computer Vision Final Project/CervicalCancer/cervical_cancer_best_model.hdf5
28/28 [==============================] - 12s 415ms/step - loss: 0.3419 - accuracy: 0.8770 - val_loss: 0.3061 - val_accuracy: 0.8900
Epoch 30/64
28/28 [==============================] - ETA: 0s - loss: 0.3269 - accuracy: 0.8843
Epoch 30: val_accuracy did not improve from 0.89000
28/28 [==============================] - 10s 371ms/step - loss: 0.3269 - accuracy: 0.8843 - val_loss: 0.3502 - val_accuracy: 0.8800
Epoch 31/64
28/28 [==============================] - ETA: 0s - loss: 0.3262 - accuracy: 0.8810
Epoch 31: val_accuracy did not improve from 0.89000
28/28 [==============================] - 11s 377ms/step - loss: 0.3262 - accuracy: 0.8810 - val_loss: 0.3315 - val_accuracy: 0.8817
Epoch 32/64
28/28 [==============================] - ETA: 0s - loss: 0.3037 - accuracy: 0.8957
Epoch 32: val_accuracy improved from 0.89000 to 0.89333, saving model to /content/drive/Shareddrives/Computer Vision Final Project/CervicalCancer/cervical_cancer_best_model.hdf5
28/28 [==============================] - 12s 412ms/step - loss: 0.3037 - accuracy: 0.8957 - val_loss: 0.3035 - val_accuracy: 0.8933
Epoch 33/64
28/28 [==============================] - ETA: 0s - loss: 0.2977 - accuracy: 0.8902
Epoch 33: val_accuracy did not improve from 0.89333
28/28 [==============================] - 10s 372ms/step - loss: 0.2977 - accuracy: 0.8902 - val_loss: 0.2949 - val_accuracy: 0.8883
Epoch 34/64
28/28 [==============================] - ETA: 0s - loss: 0.3060 - accuracy: 0.8946
Epoch 34: val_accuracy did not improve from 0.89333
28/28 [==============================] - 11s 382ms/step - loss: 0.3060 - accuracy: 0.8946 - val_loss: 0.3280 - val_accuracy: 0.8867
Epoch 35/64
28/28 [==============================] - ETA: 0s - loss: 0.2917 - accuracy: 0.9034
Epoch 35: val_accuracy improved from 0.89333 to 0.90333, saving model to /content/drive/Shareddrives/Computer Vision Final Project/CervicalCancer/cervical_cancer_best_model.hdf5
28/28 [==============================] - 12s 413ms/step - loss: 0.2917 - accuracy: 0.9034 - val_loss: 0.2756 - val_accuracy: 0.9033
Epoch 36/64
28/28 [==============================] - ETA: 0s - loss: 0.2866 - accuracy: 0.8997
Epoch 36: val_accuracy improved from 0.90333 to 0.91667, saving model to /content/drive/Shareddrives/Computer Vision Final Project/CervicalCancer/cervical_cancer_best_model.hdf5
28/28 [==============================] - 12s 423ms/step - loss: 0.2866 - accuracy: 0.8997 - val_loss: 0.2602 - val_accuracy: 0.9167
Epoch 37/64
28/28 [==============================] - ETA: 0s - loss: 0.2854 - accuracy: 0.8971
Epoch 37: val_accuracy did not improve from 0.91667
28/28 [==============================] - 11s 377ms/step - loss: 0.2854 - accuracy: 0.8971 - val_loss: 0.3099 - val_accuracy: 0.8867
Epoch 38/64
28/28 [==============================] - ETA: 0s - loss: 0.2817 - accuracy: 0.8996
Epoch 38: val_accuracy did not improve from 0.91667
28/28 [==============================] - 10s 373ms/step - loss: 0.2817 - accuracy: 0.8996 - val_loss: 0.2962 - val_accuracy: 0.8917
Epoch 39/64
28/28 [==============================] - ETA: 0s - loss: 0.2943 - accuracy: 0.8902
Epoch 39: val_accuracy did not improve from 0.91667
28/28 [==============================] - 11s 375ms/step - loss: 0.2943 - accuracy: 0.8902 - val_loss: 0.2777 - val_accuracy: 0.9067
Epoch 40/64
28/28 [==============================] - ETA: 0s - loss: 0.2632 - accuracy: 0.9081
Epoch 40: val_accuracy did not improve from 0.91667
28/28 [==============================] - 11s 380ms/step - loss: 0.2632 - accuracy: 0.9081 - val_loss: 0.2558 - val_accuracy: 0.9117
Epoch 41/64
28/28 [==============================] - ETA: 0s - loss: 0.2694 - accuracy: 0.9056
Epoch 41: val_accuracy did not improve from 0.91667
28/28 [==============================] - 11s 394ms/step - loss: 0.2694 - accuracy: 0.9056 - val_loss: 0.3021 - val_accuracy: 0.8933
Epoch 42/64
28/28 [==============================] - ETA: 0s - loss: 0.2726 - accuracy: 0.9023
Epoch 42: val_accuracy did not improve from 0.91667
28/28 [==============================] - 11s 381ms/step - loss: 0.2726 - accuracy: 0.9023 - val_loss: 0.2703 - val_accuracy: 0.9117
Epoch 43/64
28/28 [==============================] - ETA: 0s - loss: 0.2662 - accuracy: 0.8979
Epoch 43: val_accuracy did not improve from 0.91667
28/28 [==============================] - 11s 380ms/step - loss: 0.2662 - accuracy: 0.8979 - val_loss: 0.2727 - val_accuracy: 0.9017
Epoch 44/64
28/28 [==============================] - ETA: 0s - loss: 0.2549 - accuracy: 0.9096
Epoch 44: val_accuracy did not improve from 0.91667
28/28 [==============================] - 11s 380ms/step - loss: 0.2549 - accuracy: 0.9096 - val_loss: 0.3719 - val_accuracy: 0.8850
Epoch 45/64
28/28 [==============================] - ETA: 0s - loss: 0.2745 - accuracy: 0.9026
Epoch 45: val_accuracy did not improve from 0.91667
28/28 [==============================] - 10s 370ms/step - loss: 0.2745 - accuracy: 0.9026 - val_loss: 0.2903 - val_accuracy: 0.9050
Epoch 46/64
28/28 [==============================] - ETA: 0s - loss: 0.2295 - accuracy: 0.9184
Epoch 46: val_accuracy did not improve from 0.91667
28/28 [==============================] - 11s 379ms/step - loss: 0.2295 - accuracy: 0.9184 - val_loss: 0.2614 - val_accuracy: 0.9067
Epoch 47/64
28/28 [==============================] - ETA: 0s - loss: 0.2235 - accuracy: 0.9151
Epoch 47: val_accuracy did not improve from 0.91667
28/28 [==============================] - 11s 381ms/step - loss: 0.2235 - accuracy: 0.9151 - val_loss: 0.2556 - val_accuracy: 0.9067
Epoch 48/64
28/28 [==============================] - ETA: 0s - loss: 0.2262 - accuracy: 0.9187
Epoch 48: val_accuracy did not improve from 0.91667
28/28 [==============================] - 10s 374ms/step - loss: 0.2262 - accuracy: 0.9187 - val_loss: 0.2536 - val_accuracy: 0.9150
Epoch 49/64
28/28 [==============================] - ETA: 0s - loss: 0.2191 - accuracy: 0.9173
Epoch 49: val_accuracy did not improve from 0.91667
28/28 [==============================] - 11s 376ms/step - loss: 0.2191 - accuracy: 0.9173 - val_loss: 0.2559 - val_accuracy: 0.9167
Epoch 50/64
28/28 [==============================] - ETA: 0s - loss: 0.2098 - accuracy: 0.9246
Epoch 50: val_accuracy did not improve from 0.91667
28/28 [==============================] - 11s 381ms/step - loss: 0.2098 - accuracy: 0.9246 - val_loss: 0.2556 - val_accuracy: 0.9133
Epoch 51/64
28/28 [==============================] - ETA: 0s - loss: 0.1975 - accuracy: 0.9239
Epoch 51: val_accuracy did not improve from 0.91667
28/28 [==============================] - 11s 378ms/step - loss: 0.1975 - accuracy: 0.9239 - val_loss: 0.2566 - val_accuracy: 0.9150
Epoch 52/64
28/28 [==============================] - ETA: 0s - loss: 0.2054 - accuracy: 0.9272
Epoch 52: val_accuracy improved from 0.91667 to 0.92000, saving model to /content/drive/Shareddrives/Computer Vision Final Project/CervicalCancer/cervical_cancer_best_model.hdf5
28/28 [==============================] - 12s 412ms/step - loss: 0.2054 - accuracy: 0.9272 - val_loss: 0.2396 - val_accuracy: 0.9200
Epoch 53/64
28/28 [==============================] - ETA: 0s - loss: 0.2387 - accuracy: 0.9114
Epoch 53: val_accuracy did not improve from 0.92000
28/28 [==============================] - 11s 376ms/step - loss: 0.2387 - accuracy: 0.9114 - val_loss: 0.2456 - val_accuracy: 0.9183
Epoch 54/64
28/28 [==============================] - ETA: 0s - loss: 0.2033 - accuracy: 0.9242
Epoch 54: val_accuracy did not improve from 0.92000
28/28 [==============================] - 11s 385ms/step - loss: 0.2033 - accuracy: 0.9242 - val_loss: 0.2560 - val_accuracy: 0.9133
Epoch 55/64
28/28 [==============================] - ETA: 0s - loss: 0.1942 - accuracy: 0.9323
Epoch 55: val_accuracy did not improve from 0.92000
28/28 [==============================] - 10s 380ms/step - loss: 0.1942 - accuracy: 0.9323 - val_loss: 0.2748 - val_accuracy: 0.9133
Epoch 56/64
28/28 [==============================] - ETA: 0s - loss: 0.1995 - accuracy: 0.9264
Epoch 56: val_accuracy did not improve from 0.92000
28/28 [==============================] - 10s 366ms/step - loss: 0.1995 - accuracy: 0.9264 - val_loss: 0.3972 - val_accuracy: 0.8733
Epoch 57/64
28/28 [==============================] - ETA: 0s - loss: 0.2177 - accuracy: 0.9209
Epoch 57: val_accuracy did not improve from 0.92000
28/28 [==============================] - 11s 383ms/step - loss: 0.2177 - accuracy: 0.9209 - val_loss: 0.2530 - val_accuracy: 0.9150
Epoch 58/64
28/28 [==============================] - ETA: 0s - loss: 0.1948 - accuracy: 0.9308
Epoch 58: val_accuracy did not improve from 0.92000
28/28 [==============================] - 11s 381ms/step - loss: 0.1948 - accuracy: 0.9308 - val_loss: 0.2751 - val_accuracy: 0.9067
Epoch 59/64
28/28 [==============================] - ETA: 0s - loss: 0.1828 - accuracy: 0.9348
Epoch 59: val_accuracy improved from 0.92000 to 0.92833, saving model to /content/drive/Shareddrives/Computer Vision Final Project/CervicalCancer/cervical_cancer_best_model.hdf5
28/28 [==============================] - 12s 417ms/step - loss: 0.1828 - accuracy: 0.9348 - val_loss: 0.2280 - val_accuracy: 0.9283
Epoch 60/64
28/28 [==============================] - ETA: 0s - loss: 0.1699 - accuracy: 0.9385
Epoch 60: val_accuracy did not improve from 0.92833
28/28 [==============================] - 10s 364ms/step - loss: 0.1699 - accuracy: 0.9385 - val_loss: 0.2625 - val_accuracy: 0.9233
Epoch 61/64
28/28 [==============================] - ETA: 0s - loss: 0.1643 - accuracy: 0.9381
Epoch 61: val_accuracy did not improve from 0.92833
28/28 [==============================] - 10s 375ms/step - loss: 0.1643 - accuracy: 0.9381 - val_loss: 0.2678 - val_accuracy: 0.9183
Epoch 62/64
28/28 [==============================] - ETA: 0s - loss: 0.1835 - accuracy: 0.9268
Epoch 62: val_accuracy did not improve from 0.92833
28/28 [==============================] - 11s 378ms/step - loss: 0.1835 - accuracy: 0.9268 - val_loss: 0.2089 - val_accuracy: 0.9250
Epoch 63/64
28/28 [==============================] - ETA: 0s - loss: 0.1721 - accuracy: 0.9374
Epoch 63: val_accuracy did not improve from 0.92833
28/28 [==============================] - 11s 374ms/step - loss: 0.1721 - accuracy: 0.9374 - val_loss: 0.2228 - val_accuracy: 0.9250
Epoch 64/64
28/28 [==============================] - ETA: 0s - loss: 0.1624 - accuracy: 0.9374
Epoch 64: val_accuracy did not improve from 0.92833
28/28 [==============================] - 10s 372ms/step - loss: 0.1624 - accuracy: 0.9374 - val_loss: 0.2717 - val_accuracy: 0.9133
In [ ]:
# Loading the Best Fit Model
model = load_model(root_dir + "/cervical_cancer_best_model.hdf5")

Model Accuracy¶

In [ ]:
# Checking the Accuracy of the Model

accuracy = model.evaluate_generator(generator= test_data)[1]
print(f"The accuracy of your model is = {accuracy*100} %")
The accuracy of your model is = 92.77504086494446 %
In [ ]:
# [1]: This accesses the second element of the returned list, which corresponds to the accuracy of the model. The first element ([0]) is the loss.
In [ ]:
h =  cnn.history;
h.keys();
In [ ]:
# Ploting Accuracy In Training Set & Validation Set

plt.plot(h['accuracy'])
plt.plot(h['val_accuracy'] , c = "red")
plt.title("acc vs v-acc")
plt.show()
In [ ]:
# Ploting Loss In Training Set & Validation Set

plt.plot(h['loss'])
plt.plot(h['val_loss'] , c = "red")
plt.title("loss vs v-loss")
plt.show()
In [ ]:
def cancerPrediction(path):
    classes_dir = ["Dyskeratotic","Koilocytotic","Metaplastic","Parabasal","Superficial-Intermediate"]
    # Loading Image
    img = image.load_img(path, target_size=(64,64))
    # Normalizing Image
    norm_img = image.img_to_array(img)/255
    # Converting Image to Numpy Array
    input_arr_img = np.array([norm_img])
    # Getting Predictions
    pred = np.argmax(model.predict(input_arr_img))
    # Printing Model Prediction
    print(classes_dir[pred])

path = "/content/drive/Shareddrives/Computer Vision Final Project/im_Dyskeratotic/im_Dyskeratotic/CROPPED/002_04.bmp"
cancerPrediction(path)
1/1 [==============================] - 0s 446ms/step
Dyskeratotic
In [ ]:
 

For each epoch, for all the 2832 training images, they go through data augmentation? so basically the first epoch's images may look different than second epoch's images?¶

image.png

image.png

In [ ]:
import numpy as np
from sklearn.metrics import classification_report, confusion_matrix

# Get the true labels and predictions
y_true = test_data.classes

# Predict the probabilities for each class
y_pred = model.predict(test_data)

# Convert probabilities to class labels using np.argmax
y_pred_classes = np.argmax(y_pred, axis=1)

# Calculate the classification report
report = classification_report(y_true, y_pred_classes, target_names=test_data.class_indices.keys())
print("Classification Report:\n", report)

# Calculate and print the confusion matrix
cm = confusion_matrix(y_true, y_pred_classes)
print("Confusion Matrix:\n", cm)